
# This is an R script for the replication of the second application found in the following paper:
#
#       ===================================================================================================================
#        Pomeroy, C (2018) "The quantitative analysis of space policy: A review of current methods and future directions," 
#          Space Policy.
#       ===================================================================================================================
#
# R can be obtained from: http://www.r-project.org.
# Feel free to reach out with questions. Happy coding.
#
#  -Caleb
#   pomeroy.38@osu.edu
#   Department of Political Science, The Ohio State University


# ------ Load packages ------ #
setwd("YOUR DIRECTORY") # set your working directory to folder that contains the downloaded materials

require(devtools) # Install any of these packages if you don't have them already...
if (!require("readtext")) devtools::install_github("kbenoit/readtext")
library(quanteda) #install_version("quanteda", version = "1.1.1", repos = "http://cran.us.r-project.org") # the analysis was running using this version of quanteda
library(GERGM) #install_version("GERGM", version = "0.11.2", repos = "http://cran.us.r-project.org") # ...and this version of GERGM
library(stringr)
library(ggplot2)
library(grid)
library(igraph)

set.seed(1912) # set seed for replicability




#                    Text Analysis Bit 
# ======================================================= #


# ------ Data Import ------ #

data_dir <- "YOUR PATH"  # download or access spaceTexts from https://doi.org/10.7910/DVN/1WJLY8


spaceTexts_files <- readtext(paste0(data_dir, "Sessions/*"), 
                             docvarsfrom = "filenames", 
                             dvsep="_", 
                             docvarnames = c("Name", "Session", "Year", "Actor", "Speech"))


spaceTexts_corpus <- corpus(spaceTexts_files, text_field = "text") 



# ------ Add NATO/Warsaw docvar ------ #

NATO <- c("BEL", "CAN",  "DNK", "FRA", "GRC", "ISL", "ITA", 
          "LUX", "NLD", "NOR", "PRT", "TUR", "GBR", "USA", "DEU")

WARSAW <- c("RUS", "ALB", "ROU", "BGR", "CSK", "DDR", "HUN", "POL") 

docvars(spaceTexts_corpus, "Alliance") <- ifelse(spaceTexts_corpus$documents$Name %in% NATO, "NATO", 
                                                 ifelse(spaceTexts_corpus$documents$Name %in% WARSAW, "Warsaw", 
                                                        "Nonaligned"))

summary(spaceTexts_corpus)



# ------ Wordscores Model ------ #

# -- Preprocess for given period:

# these first two lines are to replicate first time period of wordscores from paper
corp.s <- corpus_subset(spaceTexts_corpus, Year >= 1961 & Year <= 1969) # 1961 is first year in corpus
corp.sub <- corpus_subset(corp.s, Actor == "S") # keep only the state actors


# unhash the below two lines to run same thing for second period in paper
#corp.s <- corpus_subset(spaceTexts_corpus, Year >= 1970 & Year <= 1979) 
#corp.sub <- corpus_subset(corp.s, Actor == "S")

tok <- tokens(corp.sub, what = "word",
              remove_punct = TRUE,
              remove_symbol = TRUE,
              remove_numbers = TRUE,
              remove_twitter = TRUE,
              remove_url = TRUE,
              remove_hyphens = TRUE,
              verbose = TRUE,
              include_docvars = TRUE)

dfm <- dfm(tok, 
           groups = "Name",
           tolower = TRUE,
           remove = stopwords("english"),
           stem=TRUE, 
           verbose = TRUE)

dfm.trim <- dfm_trim(dfm, 
                     min_count = 5, 
                     min_docfreq = 2, 
                     max_docfreq = .9)

dfm.w <- dfm_tfidf(dfm.trim)



# -- Set reference scores:

refscores <- rep(NA,nrow(dfm.w))
refscores[str_detect(rownames(dfm.w), "RUS")] <- -1
refscores[str_detect(rownames(dfm.w), "USA")] <- 1


# -- Run Wordscores model:

ws <- textmodel_wordscores(dfm.w, refscores, scale="linear", smooth=1)

wordscore <- predict(ws, rescaling="none")

dv <- subset(docvars(tok), !duplicated(corp.sub$documents$Name))
dv <- data.frame(dv[order(dv$Name),], wordscore)


# -- Plot 'em:

ggplot(dv, aes(x = reorder(Name, wordscore), y = wordscore)) +
  geom_point(aes(shape=Alliance, col=Alliance), size = 2) +
  scale_color_manual(values=c("steelblue4", "azure3", "indianred3")) +
  scale_shape_manual(values=c(3,3,3)) +
  coord_flip() + 
  theme_light() +
  labs(y = "Wordscore", x = "State")
grid.force() 
grid.edit("geom_point.points", grep = TRUE, gp = gpar(lwd=2.5)) # make points easier to see









#                 Network Analysis Bit
# ======================================================= #

# --- Import required data:

copuos_sims <- readRDS("copuos_sims.rds") # N x N adjacency matrix weighted by cosine similarities between state speeches
unga_votes <- readRDS("unga_votes.rds") # N x N adjacency matrix weighted by similarities between UNGA voting ideal points
bilat_treaty_nw <- readRDS("bilat_treaty_nw.rds") # N x N adjacency matrix weighted by counts of bilateral space cooperation agreements
node_data <- readRDS("node_data.rds") # A data frame that contains info on alliance membership and indicator for space program
treaty_sims <- readRDS("treaty_sims.rds") # N x N adjacency matrix weighted by counts of the same decisions on each of the 5 space treaties



# --- Descriptive stuff --- #

graph.sim <- graph_from_adjacency_matrix(bilat_treaty_nw[which(rowSums(bilat_treaty_nw)>0),
                                                         which(colSums(bilat_treaty_nw)>0)],
                                                             mode = "undirected", 
                                                             diag = F, 
                                                             weighted = T)

centr_degree(graph.sim, mode = "all") # calculate degree centrality

V(graph.sim)$color <- ifelse(V(graph.sim)$name %in% NATO, "steelblue4", # color nodes for plotting
                              ifelse(V(graph.sim)$name %in% WARSAW, "indianred3", "azure4"))

plot(graph.sim, # plot it
     edge.curved=F, 
     edge.width = E(graph.sim)$weight/2, 
         vertex.label.family = "sans",
         vertex.label=V(graph.sim)$name,
         vertex.label.color="white", 
         vertex.size =11, 
         vertex.frame.color=NA, 
         vertex.label.cex=.8,
             layout = layout_nicely) 




# --- Inference w/ GERGM --- #

# Specify the model
model <- treaty_sims ~ edges + netcov(copuos_sims) + netcov(unga_votes) + 
  netcov(bilat_treaty_nw) + nodematch("NATO") + nodematch("Warsaw") + nodematch("Program") + twostars


# Run the model
gergm.1 <- gergm(model, 
                  number_of_networks_to_simulate = 300000,
                  hyperparameter_optimization = T,
                  covariate_data = node_data,
                  thin = 1/100,
                  MCMC_burnin = 10000,
                  seed = 1912,
                  convergence_tolerance = 0.5,
                  network_is_directed = FALSE,
                  convex_hull_proportion = .9)

# For more info on the gergm function's arguments, check out ?GERGM::gergm
# or https://cran.r-project.org/web/packages/GERGM/vignettes/getting_started.html


# It's also possible to skip the wait time and directly load the model results presented in the paper...
#gergm.1 <- readRDS("gergm.model.1.rds") 




# --- Check model goodness-of-fit:

GOF(gergm.1) # check simulated vs observed statistics

Trace_Plot(gergm.1) # check whether sampler has converged



# --- Check hysteresis:

hysteresis_results.1 <- hysteresis(gergm.1, # check for degeneracy
                                   networks_to_simulate = 10000,
                                   burnin = 300,
                                   range = 8,
                                   steps = 20,
                                   simulation_method = "Metropolis",
                                   proposal_variance = 0.05)

hysteresis_plot(hysteresis_results.1) # plot it



# --- Check MSE:

cond.1 <- conditional_edge_prediction(  #...but how useful?
              GERGM_Object = gergm.1,
              number_of_networks_to_simulate = 100,
              thin = 1,
              proposal_variance = 0.05,
              MCMC_burnin = 100,
              seed = 1912)

MSE_results.1 <- conditional_edge_prediction_MSE(cond.1)



                    
# --------------------- THE END. --------------------- #